/*
 *  Copyright (C) 2004 Cidero, Inc.
 *
 *  Permission is hereby granted to any person obtaining a copy of 
 *  this software to use, copy, modify, merge, publish, and distribute
 *  the software for any non-commercial purpose, subject to the
 *  following conditions:
 *  
 *  The above copyright notice and this permission notice shall be included
 *  in all copies or substantial portions of the Software.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 *  LIABILITY IN CONNECTION WITH THE SOFTWARE.
 * 
 *  File: $RCSfile: XMLUtil.java,v $
 *
 */
package com.cidero.util;

import java.util.logging.Logger;
import java.util.logging.Level;

/**
 *  XML utility.  A few handy extras...
 */
public class XMLUtil
{
  private static Logger logger = Logger.getLogger("com.cidero.util");

  /** 
   *  XML uses the '&' as an 'entity' reference tag.  
   *
   *     &gt;    ->   '>'
   *     &lt;    ->   '<'
   *     &apos;  ->   '''
   *     &quot;  ->   '"'
   *     &amp;   ->   '&'
   * 
   *  A normal '&' must be represented by the '&amp;' entity to be valid
   *  XML.  Sometimes devices forget to encode this properly.  This routine
   *  looks for all occurrences of non-entity ampersands in an XML string
   *  and converts them to '&amp;'
   */
  public static String escapeNonEntityAmpersands( String xml )
  {
    if( xml == null )
      return null;
    
    StringBuffer outBuf = new StringBuffer();
    
    int inLength = xml.length();
    char[] in = xml.toCharArray();
    
    for( int i=0; i < inLength ; i++ )
    {
      if( in[i] == '&' )
      {
        // Check for valid entity ref (closing ';' before non lowercase char)
        // look ahead max of 5 chars 
        int j = i+1;
        int endIndex = (i+6);
        if( endIndex > inLength )
          endIndex = inLength;

        for( ; j < endIndex ; j++ )
        {
          if( (in[j] == ';') || (! Character.isLetter(in[j])) )
            break;
        }
        if( in[j] != ';' )
          outBuf.append("&amp;");
        else
          outBuf.append("&");
      }
      else
      {
        outBuf.append( in[i] );
      }
    }
    
    return outBuf.toString();
  }

  public static void main( String args[] )
  {
    String xml1 = "Valid XML Test1  M &amp; M";
    String xml2 = "Valid XML Test2  M &gt; M";
    String xml3 = "Invalid XML Test3  M & M";
    String xml4 = "Invalid XML Test4  &abr=4";

    System.out.println("Test1 Input: " + xml1 );
    System.out.println("Test1 Output: " + escapeNonEntityAmpersands( xml1 ) );
    System.out.println("Test2 Input: " + xml2 );
    System.out.println("Test2 Output: " + escapeNonEntityAmpersands( xml2 ) );
    System.out.println("Test3 Input: " + xml3 );
    System.out.println("Test3 Output: " + escapeNonEntityAmpersands( xml3 ) );
    System.out.println("Test4 Input: " + xml4 );
    System.out.println("Test4 Output: " + escapeNonEntityAmpersands( xml4 ) );
  }

}

